{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The KLT Tracker\n",
    "*By Erik Gärtner*\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2.2 Inverse-additive scheme\n",
    "\n",
    "The inverse-additive shceme is the following:\n",
    "\\begin{align}\n",
    "E(u, v) = \\sum_{x,y} [J(x, y) - I(x - u, y -v)] ^2\n",
    "\\end{align}\n",
    "\n",
    "Which is derived into the form of $Zd = e$, where $Z$ is the Hessian matrix.\n",
    "\n",
    "\\begin{align}\n",
    "\\sum_{x,y} \n",
    "\\begin{bmatrix}\n",
    "I_x^2 & I_x I_y \\\\\n",
    "I_x I_y & I_y^2 \\\\\n",
    "\\end{bmatrix} \n",
    "\\begin{bmatrix}\n",
    "u \\\\\n",
    "v\n",
    "\\end{bmatrix} = - \\sum_{x,y}\n",
    "\\begin{bmatrix}\n",
    "I_x D \\\\\n",
    "I_y D\n",
    "\\end{bmatrix}\n",
    "\\end{align}\n",
    "\n",
    "where D is $D(x,y) = J(x,y) - I(x - u,y -v)$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Helper functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from  scipy import ndimage, interpolate\n",
    "\n",
    "from matplotlib.pyplot import imshow, imread\n",
    "import matplotlib.pyplot as plt\n",
    "from PIL import Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_img(path, gray=True):\n",
    "    rgb = imread(path)\n",
    "    # Alternative: rgb = Image.open(path)\n",
    "    rgb = np.array(rgb, dtype=np.float32)\n",
    "    if gray:\n",
    "        return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
    "    else:\n",
    "        return rgb\n",
    "\n",
    "\n",
    "def show_img(img):\n",
    "    imshow(img, cmap = plt.get_cmap('gray'))\n",
    "\n",
    "\n",
    "I = load_img('./images/chase0.png')\n",
    "J = load_img('./images/chase1.png')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "### 1.3.1 Gradient Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sobel filter\n",
    "#X_FILTER = np.array([[1.0, 0.0, -1.0], [2., 0., -2.], [1.0, 0.0, -1.0]])\n",
    "# Sharr filter\n",
    "X_FILTER = np.array([[3.0, 0.0, -3.0], [10.0, 0.0, 10.0], [3.0, 0.0, -3.0]])\n",
    "Y_FILTER = X_FILTER.T\n",
    "\n",
    "def differentiate(img_arr):\n",
    "    x_diff = ndimage.convolve(img_arr, X_FILTER)\n",
    "    y_diff = ndimage.convolve(img_arr, Y_FILTER)\n",
    "    \n",
    "    # x_diff, y_diff = np.gradient(img_arr)\n",
    "    \n",
    "    return (x_diff, y_diff)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3.2 Estimating Z\n",
    "Z is the Hessian matrix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimate_Z(x_diff, y_diff):\n",
    "    x_diff = x_diff.flatten()\n",
    "    y_diff = y_diff.flatten()\n",
    "    \n",
    "    grads_T = np.array([x_diff, y_diff])\n",
    "    Z = np.dot(grads_T, grads_T.T)\n",
    "    return Z"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3.3 Difference Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def estimate_e(I, J, Ix, Iy):   \n",
    "    D = (J - I).flatten()\n",
    "    Ix = Ix.flatten()\n",
    "    Iy = Iy.flatten()\n",
    "    \n",
    "    grads_T = np.array([Ix, Iy])\n",
    "    \n",
    "    return -1 * np.dot(grads_T, D)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3.4 Interpolation Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def interpolate_region(img_array):\n",
    "    \"\"\"\n",
    "    Creates an bilinear interpolation of the image area sent in.\n",
    "    Evaluate using spline(x, y)\n",
    "    \"\"\"\n",
    "    x_arr = np.arange(0, img_array.shape[1])\n",
    "    y_arr = np.arange(0, img_array.shape[0])\n",
    "    interp = interpolate.interp2d(x_arr, y_arr, img_array, kind='linear')\n",
    "    #spline = interpolate.RectBivariateSpline(x_arr, y_arr, img_array, kx=1, ky=1)\n",
    "    return interp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3.5 Finalizing the KLT Tracker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_d(I, J, x, y, win_size, max_iter, min_disp):\n",
    "    \n",
    "    it = 0\n",
    "    d_tot = np.array([0., 0.]).T\n",
    "    \n",
    "    # The window to evaluate\n",
    "    win_x = np.arange(x, x + win_size[0], dtype=float)\n",
    "    win_y = np.arange(y, y + win_size[1], dtype=float)\n",
    "    \n",
    "    template = I[x:x + win_size[0], y: y + win_size[1]]\n",
    "    \n",
    "    # Find image gradient in I\n",
    "    Ix, Iy = differentiate(template)\n",
    "    \n",
    "    Z = estimate_Z(Ix, Iy)\n",
    "    print(Z)\n",
    "    Zinv = np.linalg.inv(Z)\n",
    "    \n",
    "    # Create interpolated versions the new frame\n",
    "    J_inter = interpolate_region(J)  \n",
    "    \n",
    "    while it < max_iter:\n",
    "        it += 1\n",
    "        \n",
    "        # Get the current window\n",
    "        J_win = J_inter(win_x + d_tot[0], win_y + d_tot[1])\n",
    "\n",
    "        e = estimate_e(template, J_win, Ix, Iy)       \n",
    "        d = np.dot(Zinv, e)\n",
    "        \n",
    "        d_tot = d_tot + d\n",
    "        \n",
    "        if np.hypot(d[0], d[1]) <= min_disp:\n",
    "            # Check if converged\n",
    "            return d_tot\n",
    "        \n",
    "    return d_tot   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_klt(old_image, new_image, input_points, win_size=(21, 21), max_iter=30, min_disp=0.01):\n",
    "    \n",
    "    output_points = []\n",
    "    for (x, y) in input_points:\n",
    "        d = calc_d(old_image, new_image, x, y, win_size, max_iter, min_disp)\n",
    "        output_points.append((x + d[0], y + d[1]))\n",
    "    return output_points"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3.6 Test Implementation with OpenCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "\n",
    "def opencv_klt(old_image, new_image, input_points, win_size=(21, 21)):\n",
    "    pts = np.array(input_points ,np.float32)\n",
    "    I2 = old_image.astype(np.uint8)\n",
    "    J2 = new_image.astype(np.uint8)\n",
    "    res = cv2.calcOpticalFlowPyrLK(I2, J2, pts, None, winSize=win_size, maxLevel=0) #prevPts[, nextPts[, status[, err[, winSize[, maxLevel[, criteria[, flags[, minEigThreshold]]]]]]]])\n",
    "    return res[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Experimentations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_img_uint8(path='./view0.png', gray=True):\n",
    "    rgb = Image.open(path)\n",
    "    rgb = np.array(rgb, dtype=np.uint8)\n",
    "    if gray:\n",
    "        return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
    "    else:\n",
    "        return rgb\n",
    "\n",
    "I_8 = load_img_uint8('./images/view0.png')\n",
    "J_8 = load_img_uint8('./images/view1.png')\n",
    "\n",
    "opencv_klt(I_8, J_8, [(157, 187)], (21, 21))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "calc_klt(I, J, [(157, 187), (150, 10)], (21, 21), max_iter=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
